Part 1: PCA with penguins - wrangling and scaling the data

penguin_pca <- penguins %>% 
  select(body_mass_g, ends_with("_mm")) %>% # select any column that ends in _mm
  drop_na() %>% # to remove in all columns, you do not have to specify within dorp_na
  scale() %>% #all values are scaled
  prcomp() #makes it a PCA

penguin_pca$rotation # run this to see the principle components (PCA)
##                          PC1         PC2        PC3        PC4
## body_mass_g        0.5483502 0.084362920 -0.5966001 -0.5798821
## bill_length_mm     0.4552503 0.597031143  0.6443012 -0.1455231
## bill_depth_mm     -0.4003347 0.797766572 -0.4184272  0.1679860
## flipper_length_mm  0.5760133 0.002282201 -0.2320840  0.7837987

creating a biplot - using autoplot function

penguin_complete <- penguins %>% # we want a data set with all the variables so we can add things like species color to our graph
  drop_na(body_mass_g, ends_with("_mm")) #drop na in columns used in PCA

autoplot(penguin_pca,
         data = penguin_complete,
         colour = "species",
         loadings = TRUE, #adds the arrows
         loadings.label = TRUE) + theme_classic()# adds the arrow labels
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

#sum of axi variations = total variation predicted by the biplot

Part 2: ggplot customization

Read in an excel file and do some wrangling
  • graph
fish_noaa <- read_excel(here("data","foss_landings.xlsx")) %>% 
  janitor::clean_names() %>% 
  mutate(across(where(is.character), tolower)) %>% #mutate columns across where there is a character to lower case, text in the observations instead of just the names of the column
  mutate(nmfs_name = str_sub(nmfs_name, end = -4)) %>% # remove the last four characters which are **** in the column, do not want to change the name, this will just replace it and not add a new one
  filter(confidentiality == "public")

make a customized graph

fish_plot <- ggplot(data = fish_noaa, aes(x=year, y = pounds)) +
  geom_line(aes(color = nmfs_name), show.legend = FALSE) +
  theme_minimal()

fish_plot
## Warning: Removed 6 row(s) containing missing values (geom_path).

ggplotly(fish_plot) #makes graph interactive